# adding new column: party ------------------------------
nc$party <- ifelse(nc$cand_nm == "Stein, Jill", "Green", ifelse(nc$cand_nm ==
"Johnson, Gary Earl", "Libertarian", ifelse(nc$cand_nm == "Obama, Barack",
"Democratic", "Republican")))
nc$party <- as.factor(nc$party)
# fixing dates ------------------------------------------
nc$contb_receipt_dt <- as.Date(nc$contb_receipt_dt, "%d-%b-%y")
# fixing zip codes and adding three digit zip column ----
nc$contbr_zip = substr(nc$contbr_zip, 1, 5)
nc$contbr_zip <- as.factor(nc$contbr_zip)
nc$threedigit_zip <- substr(nc$contbr_zip, 1, 3)
# fixing city names -------------------------------------
options(max.print = 500000)
nc$contbr_city <- gsub("^WINSTON.*SALEM$", "WINSTON-SALEM", nc$contbr_city)
nc$contbr_city <- gsub("APE", "APEX", nc$contbr_city)
nc$contbr_city <- gsub("APEXX$", "APEX", nc$contbr_city)
nc$contbr_city <- gsub("^A.*VILLE.*", "ASHEVILLE", nc$contbr_city)
nc$contbr_city <- gsub("ASHEVILE", "ASHEVILLE", nc$contbr_city)
nc$contbr_city <- gsub("ASHEVILLLE", "ASHEVILLE", nc$contbr_city)
nc$contbr_city <- gsub("^ATLANTIC B.*", "ATLANTIC BEACH", nc$contbr_city)
nc$contbr_city <- gsub("AUROR", "AURORA", nc$contbr_city)
nc$contbr_city <- gsub("B ELMONT", "BELMONT", nc$contbr_city)
nc$contbr_city <- gsub("BALD HEAD ISL", "BALD HEAD ISLAND", nc$contbr_city)
nc$contbr_city <- gsub("^BLACK M.*", "BLACK MOUNTAIN", nc$contbr_city)
nc$contbr_city <- gsub("BOONE, NC", "BOONE", nc$contbr_city)
nc$contbr_city <- gsub("BROWN SUMMIT", "BROWNS SUMMIT", nc$contbr_city)
nc$contbr_city <- gsub("CHAR.*OT.*", "CHARLOTTE", nc$contbr_city)
nc$contbr_city <- gsub("CHAROLETTE", "CHARLOTTE", nc$contbr_city)
nc$contbr_city <- gsub("CHA.*LOTTE", "CHARLOTTE", nc$contbr_city)
nc$contbr_city <- gsub("^CHAP.*ILL.*", "CHAPEL HILL", nc$contbr_city)
nc$contbr_city <- gsub("CH.*HILL", "CHAPEL HILL", nc$contbr_city)
nc$contbr_city <- gsub("CHAPEL", "CHAPEL HILL", nc$contbr_city)
nc$contbr_city <- gsub("CAPEL HILL", "CHAPEL HILL", nc$contbr_city)
nc$contbr_city <- gsub("^CHAPEL HILL HILL$", "CHAPEL HILL", nc$contbr_city)
nc$contbr_city <- gsub("CAROLINA B.*", "CAROLINA BEACH", nc$contbr_city)
nc$contbr_city <- gsub("CARR.*", "CARRBORO", nc$contbr_city)
nc$contbr_city <- gsub("CARY,", "CARY", nc$contbr_city)
nc$contbr_city <- gsub("CEDAR M.*", "CEDAR MOUNTAIN", nc$contbr_city)
nc$contbr_city <- gsub("CLEMMONS,", "CLEMMONS", nc$contbr_city)
nc$contbr_city <- gsub("CONNELLY.*", "CONNELLY SPRINGS", nc$contbr_city)
nc$contbr_city <- gsub("CORNELUS", "CORNELIUS", nc$contbr_city)
nc$contbr_city <- gsub("DU.*HAM.*", "DURHAM", nc$contbr_city)
nc$contbr_city <- gsub("DYRHAM", "DURHAM", nc$contbr_city)
nc$contbr_city <- gsub("E FLAT ROCK", "EAST FLAT ROCK", nc$contbr_city)
nc$contbr_city <- gsub("EDENTON-", "EDENTON", nc$contbr_city)
nc$contbr_city <- gsub("ELIZABETH CTY", "ELIZABETH CITY", nc$contbr_city)
nc$contbr_city <- gsub("^FORT\\+BRAGG$", "FORT BRAGG", nc$contbr_city)
nc$contbr_city <- gsub(".*QUAY.*VARINA", "FUQUAY-VARINA", nc$contbr_city)
nc$contbr_city <- gsub("GASTONIA.*", "GASTONIA", nc$contbr_city)
nc$contbr_city <- gsub("GRE.*RO.*", "GREENSBORO", nc$contbr_city)
nc$contbr_city <- gsub("GREENVILLLE", "GREENVILLE", nc$contbr_city)
nc$contbr_city <- gsub("HENDERSONV.*", "HENDERSONVILLE", nc$contbr_city)
nc$contbr_city <- gsub("HGILLSBOOROUGH", "HILLSBOROUGH", nc$contbr_city)
nc$contbr_city <- gsub("HICKORY.*", "HICKORY", nc$contbr_city)
nc$contbr_city <- gsub("HIGH P.*", "HIGH POINT", nc$contbr_city)
nc$contbr_city <- gsub("HILLSB.*", "HILLSBOROUGH", nc$contbr_city)
nc$contbr_city <- gsub("HOPE MILLS,", "HOPE MILLS", nc$contbr_city)
nc$contbr_city <- gsub("HUNTER.*", "HUNTERSVILLE", nc$contbr_city)
nc$contbr_city <- gsub("JACKSONVILLE,", "JACKSONVILLE", nc$contbr_city)
nc$contbr_city <- gsub("KERNERVILLE, NC", "KERNERVILLE", nc$contbr_city)
nc$contbr_city <- gsub("KILL DEVIL.*", "KILL DEVIL HILLS", nc$contbr_city)
nc$contbr_city <- gsub("KINGS.*", "KINGS MOUNTAIN", nc$contbr_city)
nc$contbr_city <- gsub("LEICESER", "LEICESTER", nc$contbr_city)
nc$contbr_city <- gsub("MARS.*HILL", "MARS HILL", nc$contbr_city)
nc$contbr_city <- gsub("MIRRISVILLE", "MORRISVILLE", nc$contbr_city)
nc$contbr_city <- gsub("MT.*PLEASANT", "MOUNT PLEASANT", nc$contbr_city)
nc$contbr_city <- gsub("MT.*ULLA", "MOUNT ULLA", nc$contbr_city)
nc$contbr_city <- gsub("MT.*AIRY", "MOUNT AIRY", nc$contbr_city)
nc$contbr_city <- gsub("MT.*GILEAD", "MOUNT GILEAD", nc$contbr_city)
nc$contbr_city <- gsub("NEW.*BERN", "NEW BERN", nc$contbr_city)
nc$contbr_city <- gsub("NORTH TOPSAIL.*", "NORTH TOPSAIL BEACH", nc$contbr_city)
nc$contbr_city <- gsub("OCEAN ISL BCH", "OCEAN ISLE BEACH", nc$contbr_city)
nc$contbr_city <- gsub("PILOT.*", "PILOT MOUNTAIN", nc$contbr_city)
nc$contbr_city <- gsub("PITTSBORRO", "PITTSBORO", nc$contbr_city)
nc$contbr_city <- gsub("RA.*LEI.*", "RALEIGH", nc$contbr_city)
nc$contbr_city <- gsub("RALIEGH", "RALEIGH", nc$contbr_city)
nc$contbr_city <- gsub("REIDSVILLE.*", "REIDSVILLE", nc$contbr_city)
nc$contbr_city <- gsub("ROANOKE RAPID.*", "ROANOKE RAPIDS", nc$contbr_city)
nc$contbr_city <- gsub("ROCKY M.*", "ROCKY MOUNT", nc$contbr_city)
nc$contbr_city <- gsub("SCALY MTN", "SCALY MOUNTAIN", nc$contbr_city)
nc$contbr_city <- gsub("SOUTH.*P.*NES", "SOUTHERN PINES", nc$contbr_city)
nc$contbr_city <- gsub("WAK.*FOREST", "WAKE FOREST", nc$contbr_city)
nc$contbr_city <- gsub("WIL.*MINGTON", "WILMINGTON", nc$contbr_city)
nc$contbr_city <- gsub("WIMGATE", "WINGATE", nc$contbr_city)
# setting the variable as a factor again
nc$contbr_city <- as.factor(nc$contbr_city)
## 'data.frame': 156828 obs. of 20 variables:
## $ cmte_id : Factor w/ 14 levels "C00410118","C00431171",..: 7 3 3 3 3 3 3 3 3 3 ...
## $ cand_id : Factor w/ 13 levels "P00003608","P20002523",..: 11 12 12 12 12 12 12 12 12 12 ...
## $ cand_nm : Factor w/ 13 levels "Bachmann, Michele",..: 7 6 6 6 6 6 6 6 6 6 ...
## $ contbr_nm : Factor w/ 34948 levels "AANSTAD, JUDY A.",..: 26321 7475 3960 34806 17911 15819 13432 33974 31187 19314 ...
## $ contbr_city : Factor w/ 767 levels "","]","ABERDEEN",..: 214 545 740 584 19 127 282 588 453 195 ...
## $ contbr_st : Factor w/ 1 level "NC": 1 1 1 1 1 1 1 1 1 1 ...
## $ contbr_zip : Factor w/ 953 levels "","*2704","0",..: 754 265 695 503 916 564 609 504 803 295 ...
## $ contbr_employer : Factor w/ 12695 levels ""," COASTAL MEDICAL TRANSPORT, INC",..: 8087 12005 9108 438 1485 9365 9108 6965 4186 3449 ...
## $ contbr_occupation: Factor w/ 7337 levels "","12K ADVOCATE",..: 5622 7118 5539 6775 6659 441 5539 4612 1054 4612 ...
## $ contb_receipt_amt: num 87.5 500 175 50 100 1000 50 250 75 100 ...
## $ contb_receipt_dt : Date, format: "2011-12-06" "2011-09-30" ...
## $ receipt_desc : Factor w/ 19 levels "","ATTRIBUTION TO PARTNERS REQUESTED / REDESIGNATION REQUESTED",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ memo_cd : Factor w/ 2 levels "","X": 1 1 1 1 1 1 1 1 1 1 ...
## $ memo_text : Factor w/ 71 levels "","*","* EARMARKED CONTRIBUTION: SEE BELOW",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ form_tp : Factor w/ 3 levels "SA17A","SA18",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ file_num : int 779227 756218 756218 756218 756218 756218 756218 756218 756218 756218 ...
## $ tran_id : Factor w/ 138767 levels "0079331-0001",..: 3636 7655 7395 7119 6961 7542 6506 7255 7029 6344 ...
## $ election_tp : Factor w/ 6 levels "","G2008","G2012",..: 6 6 6 6 6 6 6 6 6 6 ...
## $ party : Factor w/ 4 levels "Democratic","Green",..: 4 1 1 1 1 1 1 1 1 1 ...
## $ threedigit_zip : chr "285" "276" "284" "281" ...
## [1] "Bachmann, Michele" "Cain, Herman"
## [3] "Gingrich, Newt" "Huntsman, Jon"
## [5] "Johnson, Gary Earl" "Obama, Barack"
## [7] "Paul, Ron" "Pawlenty, Timothy"
## [9] "Perry, Rick" "Roemer, Charles E. 'Buddy' III"
## [11] "Romney, Mitt" "Santorum, Rick"
## [13] "Stein, Jill"
Names of the candidates involved in 2012 Presidential Election. Used as.factor() to convert several variables into factors in order to examine them fully.
## cmte_id cand_id cand_nm
## C00431445:106847 P80003338:106847 Obama, Barack :106847
## C00431171: 40665 P80003353: 40665 Romney, Mitt : 40665
## C00495820: 3909 P80000748: 3909 Paul, Ron : 3909
## C00496497: 1510 P60003654: 1510 Gingrich, Newt: 1510
## C00496034: 1105 P20002721: 1105 Santorum, Rick: 1105
## C00496067: 558 P00003608: 558 Cain, Herman : 558
## (Other) : 998 (Other) : 998 (Other) : 998
## contbr_nm contbr_city contbr_st
## SNEED, DAVID MR.: 290 CHARLOTTE :17727 NC:155592
## VAUGHAN, NGAT : 202 RALEIGH :15650
## BRAZDA, SHIRLEY : 128 DURHAM :12116
## JACKSON, PAUL : 107 CHAPEL HILL:10278
## SQUIRE, EDWARD : 105 GREENSBORO : 7122
## KOPETZ, VINETTE : 104 CARY : 4979
## (Other) :154656 (Other) :87720
## contbr_zip contbr_employer
## 27514 : 3491 RETIRED :36808
## 27517 : 3361 SELF-EMPLOYED :13986
## 27516 : 3261 NOT EMPLOYED :11051
## 27705 : 3201 INFORMATION REQUESTED PER BEST EFFORTS: 4189
## 27713 : 2551 INFORMATION REQUESTED : 3750
## 27707 : 2494 (Other) :85751
## (Other):137233 NA's : 57
## contbr_occupation contb_receipt_amt
## RETIRED :40110 Min. : 0.12
## HOMEMAKER : 4293 1st Qu.: 25.00
## PHYSICIAN : 4201 Median : 50.00
## INFORMATION REQUESTED PER BEST EFFORTS: 4000 Mean : 171.35
## ATTORNEY : 3656 3rd Qu.: 100.00
## (Other) :99319 Max. :10000.00
## NA's : 13
## contb_receipt_dt receipt_desc
## Min. :2007-12-21 :155199
## 1st Qu.:2012-07-06 REDESIGNATION FROM PRIMARY : 124
## Median :2012-09-13 REATTRIBUTION FROM SPOUSE : 79
## Mean :2012-08-04 REATTRIBUTION / REDESIGNATION REQUESTED: 74
## 3rd Qu.:2012-10-16 SEE REATTRIBUTION : 46
## Max. :2012-12-19 REATTRIBUTED : 18
## (Other) : 52
## memo_cd memo_text form_tp
## :116887 :116536 SA17A:117165
## X: 38705 * OBAMA VICTORY FUND 2012 : 21118 SA18 : 38427
## TRANSFER FROM ROMNEY VICTORY INC. : 17135 SB28A: 0
## REDESIGNATION FROM PRIMARY : 124
## * EARMARKED CONTRIBUTION: SEE BELOW: 113
## * : 96
## (Other) : 470
## file_num tran_id election_tp party
## Min. :723511 SA17.769381: 3 : 0 Democratic :106847
## 1st Qu.:810684 SA17.781633: 3 G2008: 0 Green : 30
## Median :821325 SA17.788551: 3 G2012:88585 Libertarian: 273
## Mean :832367 SA17.798308: 3 O2012: 74 Republican : 48442
## 3rd Qu.:842943 SA17.837858: 3 P2008: 2
## Max. :992728 SA17.859296: 3 P2012:66931
## (Other) :155574
## threedigit_zip
## Length:155592
## Class :character
## Mode :character
##
##
##
##
There is only one contributor above $5000. Since the data is long-tailed, I adjusted the binwidth and x-axis in an attempt to gain a better understanding of the contribution distribution. By reducing the binwidth to address bias, the variance increases, but we can see that contributions are somewhat bimodal around $50 and $100. The huge dropoff between values shows how people generally donate values that are regular numbers ($50, $100, $150, $200, $250).
For Obama, we can see that most contributions are well under $500 with a mode below $50. The histogram shows a mode under $50. For Romney, the mode is above $100 with a significant amount of individuals donating around $250.
The data is unimodal around October 2012, which was the peak of election season right before Election Day in November. Since the receipt date officially reflects the date the contribution is relinquished from the contributor, the peak in October makes sense; campaigns would certainly utilize the most money around that time of year for the final push.
## Bachmann, Michele Cain, Herman
## 412 559
## Gingrich, Newt Huntsman, Jon
## 1536 36
## Johnson, Gary Earl Obama, Barack
## 273 107631
## Paul, Ron Pawlenty, Timothy
## 3954 20
## Perry, Rick Roemer, Charles E. 'Buddy' III
## 90 146
## Romney, Mitt Santorum, Rick
## 41031 1110
## Stein, Jill
## 30
## Source: local data frame [2 x 2]
##
## contb_receipt_amt <= 0 n
## (lgl) (int)
## 1 FALSE 155592
## 2 TRUE 1236
There exist a good amount of contributors with receipt amounts of either 0 or negative values (1236). These exist to show contribution checks that were bounced or returned. For the sake of calculations, these rows will be excluded. Most campaign contributors in North Carolina gave to Barack Obama (106,847) with Mitt Romney at a distant second (40,665).
## [1] 10607642
## [1] 15978160
However, the total amount donated to the Democratic ($10,607,642) is significantly less than that of Republicans ($16,048,404).
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 19.00 40.00 99.28 100.00 10000.00
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.12 50.00 100.00 358.10 250.00 5000.00
Doing some simple mathematics, Democrats donated about $99.28 each on average while Republicans averaged about $329.84 per contributor. Median contribution might be a better measure of central tendency because of the sheer amount of outliers. That being said, Democrats had a median contribution of $40 while Republicans had a median contribution of $100.
## 27514 27517 27516 27705 27713 27707 28277 27615 27410
## 3522 3381 3293 3210 2573 2505 2291 2126 1984
## 28211 27612 28226 27613 28803 27106 27609 28804 28210
## 1726 1714 1612 1591 1517 1443 1437 1412 1398
## 27614 27519 27513 27312 27712 27104 27278 27511 27510
## 1381 1357 1353 1319 1285 1245 1237 1209 1196
## 28270 27608 28207 27408 27587 27701 28269 27455 27518
## 1193 1188 1177 1135 1127 1123 1078 1069 1065
## 27607 28173 28205 27858 28078 28036 28031 28374 28409
## 1039 1006 954 941 899 858 857 835 808
## 27604 28405 27539 27603 28739 27606 28105 27610 28209
## 798 794 785 764 761 757 753 730 727
## 28712 28601 28303 27406 28805 28607 28117 27502 28411
## 721 717 716 708 708 703 694 687 686
## 28403 28203 27703 27560 27284 28104 27407 28806 28461
## 679 678 660 653 644 627 626 603 602
## 28562 27540 27103 28202 28311 27616 28387 28412 27403
## 602 597 592 582 576 562 557 551 550
## 27704 28801 27262 27617 27529 28216 28215 27012 27605
## 549 541 535 535 534 533 532 531 527
## 28711 27526 27215 28027 27127 28704 28227 28025 28560
## 526 511 502 499 496 486 479 458 455
## (Other)
## 54780
## [1] 0.2715339
## [1] 0.1492436
## [1] 0.1832767
The top five zipcodes for contributors are all from the Triangle area, specifically Durham and Chapel Hill. The total contribution from the Triangle area (Durham, Chapel Hill, Raleigh, and Cary) amounts to $7,239,282.72, which is 27.15% of the total contribution by the state. Charlotte, the most populous city in NC, contributed $3,978,937.01, or about 14.92% of the total contribution. Even if the 10 largest cities comprising of the Charlotte Metropolitan area are included, the contribution ($4,643,388.72, 18.32%) still pales in comparison to the Triangle. The concentration of the state’s top universities (NC State, UNC Chapel Hill, and Duke) in the Triangle area might contribute to the difference.
These are just a few bar graphs to show the distribution of candidates and parties from contributions. Again, the Democratic party and Obama garnered the most amount of contributions.
Through the FEC website, I chose to examine the state of North Carolina and its financial contributions during the 2012 presidential campaign (Republicans eventually took the state with 50.39% over Democrats’ 48.35%). There were 156,828 contributors with 20 (18 plus two I created) features. None of the variables are ordered factor variables; they are either categorical or quantitative.
The plurality of contributors are retired, self-employed, or unemployed. Campaign contributions are capped at $2,700 per election to a Federal candidate. However, individuals can give up to $5,000 to Political Action Committees (PACs) or up to $10,000 to the State or local party committee.
The main features in this data set are contribution amount and candidate/party affiliation. I would like to examine which variables determine the amount of contribution towards a specific candidate or political party. This is probably some combination of geographic region (zipcode, city) and the candidate or party him/herself.
It would be interesting to look at the occupations and employers of contributors who are in the workforce. Perhaps this could also provide some insight as to what type of companies support each candidate. Even though Republican donors gave fewer contributions, the amount contributed was far greater than their Democratic counterparts. Subsetting the data by party will allow for more exploration into this discrepancy.
A new column called party was created to represent each candidate’s political party. A majority were Republican with the exceptions of Barack Obama (D), Gary Johnson (Libertarian), and Jill Stein (Green). Even though there were 10 Republican candidates, Obama and the Democratic party received the most number of contributions (107631). Republicans outside of Mitt Romney receieved a total of less than 8,000 contributions. The Libertarian (273) and Green (30) parties receieved comparably miniscule amounts. Another new column called threedigit_zip was created since the first three digits of a zipcode specify a city or divides a larger city into several smaller parts. Sometimes, five-digit zipcodes are too specific, as some cities can have numerous five-digit zipcodes with identical first three digits.
Barack Obama dominated campaign contributions, but Mitt Romney won the state with 51% of the popular vote, showing a discrepancy between funding and candidate popularity. There also exist some negative campaign contributions up to -$5,000, which are nonsensical. Zipcodes were often in the extended format, so I amended these to only show 5 digits. In addition, many rows had incorrectly spelled city names (Asheville, Ashevile, Ashville), so I corrected these fields with regex. This would give me a more accurate look into which areas of NC generally supported which candidate. The dates were in a nonstandard format, so I converted them into YYYY-MM-DD format using as.Dates in order to fully utilize this column.
The top two candidates, Obama and Romney have an extraordinary amount of outliers. This is also reflected in Republicans and Democrats. Perhaps an indepth exploration of quantiles will show the exact amount of outliers.
## 0% 25% 50% 75% 100%
## 1 19 40 100 10000
## Source: local data frame [2 x 2]
##
## (contb_receipt_amt > (obamaiqr * 1.5)... n
## (lgl) (int)
## 1 FALSE 96696
## 2 TRUE 10935
Obama’s contributors averaged around $99.28 with an Interquartile Range of $81 (100 - 19). There are 10,935 outliers out of the total 106,847 Obama contributors, or who donated greater than $221.50 (1.5 * 81 + 100).
## 0% 25% 50% 75% 100%
## 0.12 50.00 100.00 250.00 5000.00
## Source: local data frame [2 x 2]
##
## (contb_receipt_amt > (obamaiqr * 1.5)... n
## (lgl) (int)
## 1 FALSE 25362
## 2 TRUE 15669
Romney’s contributors averaged $100 with an IQR of $200 (250 - 50). There are 15,669 outliers out of the total 40,665 Romney contributors, or who donated greater than $550 (1.5 * 200 + 250).
## 0% 25% 50% 75% 100%
## 25 250 400 1750 5000
Rick Perry contributors had the highest mean contribution of $400 with a 75% quartile of $1,750. It seems that Perry supporters donated in larger chunks than other candidates.
Faceting by candidate does show that the number of contributions increased for the two relevant candidates, Obama and Romney, up to Election Day (November 6, 2012). For other Republican candidates, the level of contribution rose during their campaigns and fell after they were not selected through the Primary or decided to drop out of the race. Libertarian and Green Party candidates Johnson and Stein did not have nearly the level of financial support that the two main parties possessed.
Faceting by candidate reinforces the fact that the median donation for Obama is less than that of Romney. Obama has many more donations under $100.
Obama was campaigning for reelection and was therefore the sole representative for the Democratic Party. Romney eventually edged out the other Republican nominees for the candidacy race, which can be reflected in the chart: his contribution count and total dwarf the rest of the Republicans’ numbers.
For minor tier candidates, distribution of counts and totals were similar with the exception of Rick Perry, who achieved a significantly higher average contribution amount than the other candidates of the same tier.
Created line plots of both contribution counts and total amounts. Both plots suggest an exponential relationship leading up to a peak around Election Day.
By taking the log 10 of count and fitting a straight line to this edited chart, we can see that an exponential model fits rather well to the data.
The top zipcodes by count are all located in the Triangle area (27514, 27516, 27517, 27705). However, the top zipcodes by total contribution amount are located in Charlotte (28207, 28211). Interestingly enough, 28207 is not in the top 10 of count even though the total amount donated was the greatest. Looking at the bar plot for mean, we can see that 28207 has an incredibly high average contribution amount (> 600), which indicates a lower count combined with a higher contribution total.
The top five cities by both contribution count and total are identical, with similar distributions across both bar charts. Durham is the only exception, with a significantly higher count but a lower total than Chapel Hill. Looking at top five cities by mean contribution, Durham has the lowest mean, which contributes to its relatively lower total.
The distribution of contributions in Charlotte is very similar to that of the entire state, with Obama significantly leading count and Romney significantly leading total.
It is interesting to note that a significant chunk of contributors are retired. Ignoring the significant portion of contributors who declined to state their profession, the next top contributors are homemakers. As homemakers most likely do not generate very much income, it is feasible to attribute this high contribution total to those who match their contribution with that of their spouse (this is an available and widely used option in campaign donations). After homemakers are contributors that come from highly esteemed careers such as physicians, attorneys, professors, executives, and presidents. Looking at means, Executives and Presidents have significantly higher mean contributions than other jobs.
##
## Pearson's Chi-squared test
##
## data: tbl_zip_test
## X-squared = 6221.28, df = 19, p-value < 2.2e-16
Limited 3 digit zipcodes to between 270 and 289 since all North Carolina zipcodes are within that range. Using a Chi-squared test, we get a p-value of almost 0. Therefore we strongly reject the null hypothesis that party affiliation is independent of zipcode.
The three digit zipcodes 275 and 282, corresponding with Raleigh and Charlotte, respectively, exhibit both highest counts and totals. However, the 275 Raleigh area shows a greater Democratic influence while the 276 Raleigh and 282 Charlotte area has a higher proportion of Republicans than Democrats in the total. We can conclude that in these two areas, Republicans contribute a higher average amount than Democrats, causing this discrepancy.
##
## Pearson's Chi-squared test
##
## data: tbl_city_test
## X-squared = 18362.33, df = 743, p-value < 2.2e-16
Similar procedure done on contributor city and party reveals the same. We reject the null hypothesis that a contributor’s city is independent of party affiliation.
As most of the variables are nominal (most are simply labels for each contributor), it was infeasible to try and take the correlation of any two variables. However, despite the lack of correlation, there are several observations to be made.
For the two main candidates: Barack Obama and Mitt Romney, the amount of contributions (contribution receipt amount) stayed mostly constant throughout the election process until the final month before Election Day. Campaigns probably utilize the most money during that time since it resembles a final push and there is no incentive to withhold spending after elections.
Obama contributors gave less than Romney contributors on average despite having a much higher contribution count. Rick Perry contributors gave an overwhelmingly higher average donation, but the total amount paled in comparison to the two main candidates as Perry did not win the nomination and subsequently dropped out of the race.
The cities and zipcodes with the most amount of contributors were located in the top two metropolitan areas of North Carolina, Charlotte and the Triangle (Raleigh, Durham, Chapel Hill). The distribution of contributions in these cities was identical to the that of the total dataset, with Obama garnering the most contributions by far with Romney in second with significantly less.
Looking at occupations and employers, the distribution of contributions is noticeably different with Retired Individuals and Homeowners. Homeowners are almost evenly split between donating to Obama and to Romney, while Retired individiuals have a slightly higher than usual proportion donated to Romney compared to the grand distribution. Retired contributors gave vastly more total money than any other profession.
Since campaign receipt was the only numerical (discrete) variable, the only way to measure independence between numerical and categorical (nominal) variables was through a Chi-squared Test of Independence. Running one between party affiliation and zipcode showed that the two variables are definitely not independent of each other (p-value close to 0). A similar test between party affiliation and city produced similar results. However, the strength of these relationships are unknown.
Using ANOVA to evaluate grouped means between contribution receipt and nominal variables (party, zipcode, city) would be infeasible in this case since receipts are discrete, not a continuous variable.
We can see huge spikes of daily contributions in April and the few months leading up to Election Day, especially for Mitt Romney. Maybe looking at cumulative sums would be more interesting than daily sums.
Looking at cumulative sums, we can see that Obama begins the campaign with a higher sum, but is overtaken by Romney around April 2012. Romney came to Charlotte, North Carolina on April 18, 2012 to give a speech, so this could be the cause of the giant spike of contributions in that month.
Between Obama and Romney, we can see that the Obama receieved the most donations from the Raleigh area (three-digit code 275 and 276) while Romney received his most donations from Charlotte (280-282). Obama receieved the second most donations from Durham while Romney received one of the smallest amounts by zipcode from Durham (Durham is a much more liberal county than the rest of the state).
On a smaller scale between the other relevant Republican nominees and libertarian candidate Gary Earl Johnson, Ron Paul led this smaller group with major donations from Raleigh, Charlotte, and Asheville. Rick Perry relied on donations solely from Charlotte. The amount of financial support for third party candidates such as Gary Johnson is similar to that of low-popularity Republican nominees who dropped out of the race early.
Obama received a higher amount of total contribution amount towards the beginning of the campaign, but the efforts of Romney in the final year, including giving a speech in Charlotte, strengthened his run in the final stretch and caused him to pull ahead in terms of financial support. We could already say for certainty through previous chi-square tests that party is not independent of zipcode and city, and these plots furthered strengthened that claim.
It was surprising to see how much discrepancy there was between the two main candidates in larger cities. North Carolina has been a swing state in recent years, voting Democratic in 2008 but Republican in 2012. The level of contribution to the two parties in 2012 shows public sentiment shifting towards Romney, whether it be through his extra efforts in the state or Obama’s current term in the office preventing his campaign from reaching the same level of notoriety as in 2008. In 2012, contributors from large cities showed more favor towards Romney as compared to before, even if Raleigh as a whole donated more towards Obama. In smaller zipcodes (with the exception of Durham), the amount of support for Romney overwhelmed that of Obama, probably because small towns in North Carolina largely vote conservative.
The only possible models would be time series models between receipt date and contribution receipt counts/totals. However, the date times are unevenly split, which makes modeling this data very difficult without an advanced algorithm.
Examining both contribution count and contribution totals from July 2011 to Election Day (November 6, 2012), suggested that the relationships were exponential. Therefore, both y-axes were subjected to a log10 transformation, and a linear model was fitted to both plots. Since a linear model seems to fit well to both plots after this transformation, we can say that an exponential model is a good fit for this data. The counts and totals exhibit this behavior as the days approach Election Day, since campaigns utilize the most money in the final stretch; there is no incentive to save money for after the election.
From 2011 to 2013, where the bulk of contribution donations occurred, Obama held a slight lead in the earlier months until around April 2014, when Romney campaigned within North Carolina, giving a speech in Charlotte and driving a large boost of contributions. By Election Day, Romney and the Republicans had amassed a much larger cumulative total over the Democrats, and the Republicans won the popular vote in North Carolina.
These bar charts show the distribution of contribution counts and totals for each party in a given three digit NC zipcode. The three digit zipcodes 275 and 282, corresponding with Raleigh and Charlotte, respectively, exhibit both highest counts and totals. However, the 275 Raleigh area shows a greater Democratic influence while the 276 Raleigh and 282 Charlotte area has a higher proportion of Republicans than Democrats in the total. We can conclude that in these two areas, Republicans contribute a higher average amount than Democrats, causing this discrepancy.
The nc data set contained contribution information on almost 160,000 individuals from the 2012 Presidential Election. Although the variables were mostly categorical (nominal) with the exception of contribution receipt amount and date, there was still much examination to be done regarding donation splits among candidates and parties. Eventually, I explored the amount of financial contribution across several variables and was able to visualize the distribution of contribution receipts over several variables such as party, zipcode, city, and occupation. Unfortunately, this data could not be modeled due to the uneven dates; linear interpolation to fill in between dates would not be a feasible solution. However, through manipulating the data and grouping the data by different variables, I was able to discover just how much more funding and support Mitt Romney and the Republicans had than Obama and the Democrats. In addition, I examined the split of donations across zipcodes and cities of North Carolina and its inhabitants’ occupations to gain a better picture of how the state contributed to each party as a whole and in parts. In the future, for better and more in-depth analyses, it would be interesting to explore additional occupational metrics such as annual salary to see if there existed a relationship between annual salary and contribution amount. Another idea would be to examine each county’s voting distribution to see if voting habits and contributions are similar in a given county. Finally, additional information about contributors themselves such as their age, gender, and population of their listed zipcode or city could illuminate trends in these variables and contribution amounts to better predict which sorts of individuals contribute how much and to which party or candidate.